# from gensim.models import Word2Vec
# import jieba
# raw_sentences=[jieba.lcut("我爱北京天安门"),jieba.lcut("我喜欢出去玩"),jieba.lcut("我热爱读书")]
# print(raw_sentences)
# model = Word2Vec(raw_sentences,min_count=1)
# print(model.wv.similarity("天安门","爱"))

#读取文件
with open("111.txt", 'r', encoding="utf-8") as f:
    text=f.read()
import jieba
import jieba.posseg as pesg

# 结巴分词
words=jieba.lcut(text)
print("/".join(words))



# 统计词频
from collections import Counter
counts=Counter(words)
for word,count in counts.items():
    print(f"{word}:{count}")
